---
title: "QTA Final Assignment"
author: "Samuel Johnston"
date: "12/12/2019"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.

When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

```{r cars}
summary(cars)
```

## Including Plots

You can also embed plots, for example:

```{r pressure, echo=FALSE}
plot(pressure)
```

Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.


```{r}
#To load all the libraries we need
install.packages("") #to install the packages you need

library(dplyr)
library(quanteda)
library(stopwords)
library(SnowballC)
library(lubridate)
library(stm)
library(topicmodels)
library(igraph)
library(plm)
library(lmtest)
library(stargazer)
library(lme4)
library(reshape2)
```


```{r}
#To load the ParlSpeech dataset for the Netherlands, write it to csv, and then to turn it into a corpus:
tkcorpus <- readRDS("/Users/samueljohnston/Documents/R Code/GL and RRPs/Corp_TweedeKamer_V2.rds") 

#To remove all years prior to the radical right getting represented in the Tweede Kamer:
tkcorpus2 <- tkcorpus %>% filter(tkcorpus$date >= "2002-05-01")

#Now to split this dataset into 4 different datasets: GroenLinks (hereafter GL); radical right (hereafter RR); left-wing parties (hereafter left), which includes SP, PvdA, D66, and DENK; and right-wing parties (hereafter right), which includes CDA, CU, VVD, and SGP: 
gl.corpus <- tkcorpus2[ which(tkcorpus2$party == "GL"), ]
rr.corpus <- tkcorpus2[ which(tkcorpus2$party %in% c("LPF", "PVV", "FvD")), ]
left.corpus <- tkcorpus2[ which(tkcorpus2$party %in% c("SP", "PvdA", "D66", "DENK")), ]
right.corpus <- tkcorpus2[ which(tkcorpus2$party %in% c("CDA", "CU", "VVD", "SGP")), ]

#To convert all 4 of these into corpora: 
gl_corpus <- corpus(gl.corpus, text_field = "text")
rr_corpus <- corpus(rr.corpus, text_field = "text")
left_corpus <- corpus(left.corpus, text_field = "text") 
right_corpus <- corpus(right.corpus, text_field = "text") 
```


```{r}
#Let's begin with the GL corpus.
#Firstly, let's create two functions, the first carries out basic cleaning (keeping document variables, lowercasing, and removing stopwords), and the second for finding unsupervised collocations, where words in the pre-processed text have to co-occur at least 20 times to be counted as a collocation:
toks <- tokens(gl_corpus, include_docvars = TRUE) %>%
               tokens_tolower() %>% 
               tokens_remove(stopwords("dutch"), padding = TRUE) 

get_coll <-function(tokens){
  unsup_col <- textstat_collocations(tokens, method = "lambda",size = 2, min_count = 20, smoothing = 0.5)
  unsup_col <- unsup_col[order(-unsup_col$count),] #sort detected collocations by count (descending)
  return(unsup_col)}

#We can apply this second function to the corpus to carry out the prediction and replacement of collocations:
collocations <- get_coll(toks) #This applies the second function to get the collocations in the yelp corpus
toks <- tokens_compound(toks, pattern = collocations[collocations$z > 20]) #To replace the collocations in the corpus

#Now that we have done some basic cleaning, carried out unsupervised collocation, we can also remove whitespace, numbers, punctuation, symbols, hyphens, and separators from the corpus, so that we are just left with words and collocations as tokens: 
toks <- tokens_remove(tokens(toks), "") #This removes the whitespace placeholders
toks <- tokens(toks, remove_numbers = TRUE, remove_punct = TRUE, remove_symbols = TRUE, split_hyphens = TRUE,                                       remove_separators = TRUE) 


#Secondly, to generate the DFM (gl_dfm) and to trim it by stemming and removing any tokens that occur less than 20 times or in less than 20 documents:
gl_dfm <- dfm(toks)
gl_dfm <- dfm_wordstem(gl_dfm, language = "dutch") %>%
          dfm_trim(min_termfreq = 20, min_docfreq = 20) 

#Thirdly, to parse dates within DFM docvars:
gl_dfm@docvars$ymd <- ymd(gl_dfm@docvars$date)
gl_dfm@docvars$date_month <- floor_date(gl_dfm@docvars$ymd, "month")
gl_dfm@docvars$date_month <- as.numeric(gl_dfm@docvars$date_month)


#Fourthly, to aggregate it up to the speaker-month (sm) level: 
gl_dfm@docvars$sm <- paste(gl_dfm@docvars$speaker, gl_dfm@docvars$date_month, sep = "")

#Fifthly, to attach the original text to the DFM: 
gl_dfm@docvars$original_text <- gl.corpus$text

#to remove any NAs:
na.omit(gl_dfm@docvars$speaker)
na.omit(gl_dfm@docvars$date_month)

#Finally, to save it to disk and load it:
saveRDS(gl_dfm, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/gldfm.rds")
gl_dfm <- readRDS("/Users/samueljohnston/Documents/R Code/GL and RRPs/gldfm.rds")
```


```{r}
#This section is for creating the RR dfm. 

#Firstly, let's create two functions, the first carries out basic cleaning (keeping document variables, lowercasing, and removing stopwords), and the second for finding unsupervised collocations, where words in the pre-processed text have to co-occur at least 20 times to be counted as a collocation:
toks <- tokens(rr_corpus, include_docvars = TRUE) %>%
               tokens_tolower() %>% 
               tokens_remove(stopwords("dutch"), padding = TRUE) 

get_coll <-function(tokens){
  unsup_col <- textstat_collocations(tokens, method = "lambda",size = 2, min_count = 20, smoothing = 0.5)
  unsup_col <- unsup_col[order(-unsup_col$count),] #sort detected collocations by count (descending)
  return(unsup_col)}

#We can apply this second function to the corpus to carry out the prediction and replacement of collocations:
collocations <- get_coll(toks) #This applies the second function to get the collocations in the yelp corpus
toks <- tokens_compound(toks, pattern = collocations[collocations$z > 20]) #To replace the collocations in the corpus

#Now that we have done some basic cleaning, carried out unsupervised collocation, we can also remove whitespace, numbers, punctuation, symbols, hyphens, and separators from the corpus, so that we are just left with words and collocations as tokens: 
toks <- tokens_remove(tokens(toks), "") #This removes the whitespace placeholders
toks <- tokens(toks, remove_numbers = TRUE, remove_punct = TRUE, remove_symbols = TRUE, split_hyphens = TRUE,                                       remove_separators = TRUE) 


#Secondly, to generate the DFM (rr_dfm) and to trim it by stemming and removing any tokens that occur less than 20 times or in less than 20 documents:
rr_dfm <- dfm(toks)
rr_dfm <- dfm_wordstem(rr_dfm, language = "dutch") %>%
          dfm_trim(min_termfreq = 20, min_docfreq = 20)


#Thirdly, to parse dates within DFM docvars:
rr_dfm@docvars$ymd <- ymd(rr_dfm@docvars$date)
rr_dfm@docvars$date_month <- floor_date(rr_dfm@docvars$ymd, "month")
rr_dfm@docvars$date_month <- as.numeric(rr_dfm@docvars$date_month)


#Fourthly, to attach the original text to the DFM: 
rr_dfm@docvars$original_text <- rr.corpus$text

#to remove any NAs:
na.omit(rr_dfm@docvars$speaker)
na.omit(rr_dfm@docvars$date_month)

#Finally, to save it to disk and load it:
saveRDS(rr_dfm, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/rrdfm.rds")
rr_dfm <- readRDS("/Users/samueljohnston/Documents/R Code/GL and RRPs/rrdfm.rds")
```


```{r}
#This section is for creating the left dfm. 

#Firstly, let's create two functions, the first carries out basic cleaning (keeping document variables, lowercasing, and removing stopwords), and the second for finding unsupervised collocations, where words in the pre-processed text have to co-occur at least 20 times to be counted as a collocation:
toks <- tokens(left_corpus, include_docvars = TRUE) %>%
               tokens_tolower() %>% 
               tokens_remove(stopwords("dutch"), padding = TRUE) 

get_coll <-function(tokens){
  unsup_col <- textstat_collocations(tokens, method = "lambda",size = 2, min_count = 20, smoothing = 0.5)
  unsup_col <- unsup_col[order(-unsup_col$count),] #sort detected collocations by count (descending)
  return(unsup_col)}

#We can apply this second function to the corpus to carry out the prediction and replacement of collocations:
collocations <- get_coll(toks) #This applies the second function to get the collocations in the yelp corpus
toks <- tokens_compound(toks, pattern = collocations[collocations$z > 20]) #To replace the collocations in the corpus

#Now that we have done some basic cleaning, carried out unsupervised collocation, we can also remove whitespace, numbers, punctuation, symbols, hyphens, and separators from the corpus, so that we are just left with words and collocations as tokens: 
toks <- tokens_remove(tokens(toks), "") #This removes the whitespace placeholders
toks <- tokens(toks, remove_numbers = TRUE, remove_punct = TRUE, remove_symbols = TRUE, split_hyphens = TRUE,                                       remove_separators = TRUE) 


#Secondly, to generate the DFM (left_dfm) and to trim it by stemming and removing any tokens that occur less than 20 times or in less than 20 documents:
left_dfm <- dfm(toks)
left_dfm <- dfm_wordstem(left_dfm, language = "dutch") %>%
          dfm_trim(min_termfreq = 20, min_docfreq = 20)


#Thirdly, to parse dates within DFM docvars:
left_dfm@docvars$ymd <- ymd(left_dfm@docvars$date)
left_dfm@docvars$date_month <- floor_date(left_dfm@docvars$ymd, "month")
left_dfm@docvars$date_month <- as.numeric(left_dfm@docvars$date_month)


#Fourthly, to attach the original text to the DFM: 
left_dfm@docvars$original_text <- left.corpus$text

#to remove any NAs:
na.omit(left_dfm@docvars$speaker)
na.omit(left_dfm@docvars$date_month)

#Finally, to save it to disk and load it:
saveRDS(left_dfm, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/leftdfm.rds")
left_dfm <- readRDS("/Users/samueljohnston/Documents/R Code/GL and RRPs/leftdfm.rds")
```

```{r}
#This section is for creating the right dfm. 

#Firstly, let's create two functions, the first carries out basic cleaning (keeping document variables, lowercasing, and removing stopwords), and the second for finding unsupervised collocations, where words in the pre-processed text have to co-occur at least 20 times to be counted as a collocation:
toks <- tokens(right_corpus, include_docvars = TRUE) %>%
               tokens_tolower() %>% 
               tokens_remove(stopwords("dutch"), padding = TRUE) 

get_coll <-function(tokens){
  unsup_col <- textstat_collocations(tokens, method = "lambda",size = 2, min_count = 20, smoothing = 0.5)
  unsup_col <- unsup_col[order(-unsup_col$count),] #sort detected collocations by count (descending)
  return(unsup_col)}

#We can apply this second function to the corpus to carry out the prediction and replacement of collocations:
collocations <- get_coll(toks) #This applies the second function to get the collocations in the yelp corpus
toks <- tokens_compound(toks, pattern = collocations[collocations$z > 20]) #To replace the collocations in the corpus

#Now that we have done some basic cleaning, carried out unsupervised collocation, we can also remove whitespace, numbers, punctuation, symbols, hyphens, and separators from the corpus, so that we are just left with words and collocations as tokens: 
toks <- tokens_remove(tokens(toks), "") #This removes the whitespace placeholders
toks <- tokens(toks, remove_numbers = TRUE, remove_punct = TRUE, remove_symbols = TRUE, split_hyphens = TRUE,                                       remove_separators = TRUE) 


#Secondly, to generate the DFM (right_dfm) and to trim it by stemming and removing any tokens that occur less than 20 times or in less than 20 documents:
right_dfm <- dfm(toks)
right_dfm <- dfm_wordstem(right_dfm, language = "dutch") %>%
          dfm_trim(min_termfreq = 20, min_docfreq = 20)


#Thirdly, to parse dates within DFM docvars:
right_dfm@docvars$ymd <- ymd(right_dfm@docvars$date)
right_dfm@docvars$date_month <- floor_date(right_dfm@docvars$ymd, "month")
right_dfm@docvars$date_month <- as.numeric(right_dfm@docvars$date_month)


#Fourthly, to attach the original text to the DFM: 
right_dfm@docvars$original_text <- right.corpus$text

#to remove any NAs:
na.omit(right_dfm@docvars$speaker)
na.omit(right_dfm@docvars$date_month)

#Finally, to save it to disk and load it:
saveRDS(right_dfm, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/rightdfm.rds")
right_dfm <- readRDS("/Users/samueljohnston/Documents/R Code/GL and RRPs/rightdfm.rds")
```


```{r}
#1: GL STM. Firstly, in order to convert the DFM into STM (which helps with memory):
gl_stmdfm <- convert(gl_dfm, to = "stm") 
gl_stmdfm$meta$date_month <- floor_date(gl_stmdfm$meta$ymd, "month")


#Secondly, in order to fit an STM model with k=50 topics, 500 iterations for convergence (the max.em.its argument), and seed set to enable replicability:
modelFit3 <- stm(documents = gl_stmdfm$documents, vocab = gl_stmdfm$vocab, data = gl_stmdfm$meta, K=50, max.em.its=500, seed=1234, prevalence = ~ speaker + s(date_month))

#To save the estimated model to disk and then to load it:
save(modelFit3, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/gl_modelfit")
load("/Users/samueljohnston/Documents/R Code/GL and RRPs/gl_modelfit")

#To determine what the topics are:
labelTopics(modelFit3)
findThoughts(modelFit3, texts = gl_stmdfm$meta$original_text, topics = 21, n = 20) #use this whenever we are unsure about a topic to examine 20 articles


#Now, to create a dataset based on this DV. Firstly, we find the theta for both topics 1 (integration) and 21 (control), sorting them by month and speaker, resulting in 1438 observations. We then turn this into a dataframe (gl_data), and construct two GL immigration prevalence variables (Integration and Control) by combining the thetas for both topics: 

theta_topic <- modelFit3$theta[, c(1, 21)] #get the thetas for topics 1 and 21 (on integration and control, respectively)

agg_theta <- aggregate(theta_topic, by = list(month = gl_stmdfm$meta$date_month, speaker = gl_stmdfm$meta$speaker), FUN = mean)

gl_data <- as.data.frame(agg_theta) #create the dataframe
gl_data$Integration <- (gl_data$V1)*100 #create the immigration integration variable as a percentage
gl_data$Control <- (gl_data$V2)*100 #create the immigration control variable as a percentage

#Now, to write it to CSV: 
write.csv(gl_data, "/Users/samueljohnston/Documents/R Code/GL and RRPs/gl_data.csv")


#For a robustness check, we also run this for k=40 and 60 topics: 
modelFit1 <- stm(documents = gl_stmdfm$documents, vocab = gl_stmdfm$vocab, data = gl_stmdfm$meta, K=40, max.em.its=500, seed=1234, prevalence = ~ speaker + s(date_month))

save(modelFit1, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/gl_modelfit1")
load("/Users/samueljohnston/Documents/R Code/GL and RRPs/gl_modelfit1")

labelTopics(modelFit1)
findThoughts(modelFit1, texts = gl_stmdfm$meta$original_text, topics = 58, n = 20) #use this whenever we are unsure about a topic to examine 20 articles


modelFit2 <- stm(documents = gl_stmdfm$documents, vocab = gl_stmdfm$vocab, data = gl_stmdfm$meta, K=60, max.em.its=500, seed=1234, prevalence = ~ speaker + s(date_month))

save(modelFit2, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/gl_modelfit2")
load("/Users/samueljohnston/Documents/R Code/GL and RRPs/gl_modelfit2")

labelTopics(modelFit2)
findThoughts(modelFit2, texts = gl_stmdfm$meta$original_text, topics = 58, n = 20) #use this whenever we are unsure about a topic to examine 20 articles


#For k=40, we get three topics: topic 1 is on integration,and topics 6 and 21 are on control. 
theta_topic <- modelFit1$theta[, c(1, 6, 21)]
agg_theta <- aggregate(theta_topic, by = list(month = gl_stmdfm$meta$date_month, speaker = gl_stmdfm$meta$speaker), FUN = mean)

#For k=60, we get three topics: topic 6 is on control, topic 58 is on integration, and topic 22 is a robustness check for Hirsi Ali: 
theta_topic2 <- modelFit2$theta[, c(6, 58, 22)]
agg_theta2 <- aggregate(theta_topic2, by = list(month = gl_stmdfm$meta$date_month, speaker = gl_stmdfm$meta$speaker), FUN = mean)

#Now to create a dataframe including the topics from both STMs:
gl_data2 <- as.data.frame(agg_theta) #create the dataframe
gl_data2$FortyIntegration <- (gl_data2$V1)*100 #create the immigration integration variable as a percentage
gl_data2$FortyControl <- (gl_data2$V2 + gl_data2$V3)*100 #create the immigration control variable as a percentage
write.csv(gl_data2, "/Users/samueljohnston/Documents/R Code/GL and RRPs/gl_data2.csv")


gl_data3 <- as.data.frame(agg_theta2)
gl_data3$SixtyControl <- (gl_data3$V1)*100 
gl_data3$SixtyIntegration <- (gl_data3$V2)*100 
write.csv(gl_data3, "/Users/samueljohnston/Documents/R Code/GL and RRPs/gl_data3.csv")
```

```{r}
#2: RR STM. Firstly, in order to convert the DFM into STM (which helps with memory):
rr_stmdfm <- convert(rr_dfm, to = "stm") 
rr_stmdfm$meta$date_month <- floor_date(rr_stmdfm$meta$ymd, "month")


#Secondly, in order to fit an STM model with k=50 topics, 500 iterations for convergence (the max.em.its argument), and seed set to enable replicability:
rrmodelFit <- stm(documents = rr_stmdfm$documents, vocab = rr_stmdfm$vocab, K=50, max.em.its=500, seed=1234, data = rr_stmdfm$meta, prevalence = ~ speaker + s(date_month))

#Finally, to save the estimated model to disk and then to load it:
save(rrmodelFit, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/rr_modelfit")
load("/Users/samueljohnston/Documents/R Code/GL and RRPs/rr_modelfit")

#To determine what the topics are:
labelTopics(rrmodelFit)
findThoughts(rrmodelFit, texts = rr_stmdfm$meta$original_text, topics = 47, n = 20) #use this whenever we are unsure about a topic to examine 20 articles

#Now we find the theta for topics 22 (control), 4, 34, and 47 (the 3 on integration), and 1 (a control robustness check), sorting them by month. We then add this to the dataset: 
rr_theta_topic <- rrmodelFit$theta[,c(1, 4, 22, 34, 47)] #get the thetas for the topics
rr_theta <- aggregate(rr_theta_topic, by = list(month = rr_stmdfm$meta$date_month),FUN = mean)
rr_theta <- rr_theta[order(rr_theta$month), ] #order the aggregated data by month, in ascending order (i.e., from 2002 to 2019)
rr_theta$Control <- (rr_theta$V3)*100 #create the immigration prevalence variable as a percentage
rr_theta$Integration <- (rr_theta$V2 + rr_theta$V4 + rr_theta$V5)*100 #create the immigration prevalence variable as a percentage
rr_theta$Topic1 <- (rr_theta$V1)*100 #create the prevalence variable for topic 1 as a percentage

#to export it as a csv so we can add it to the overall dataset:
write.csv(rr_theta, "/Users/samueljohnston/Documents/R Code/GL and RRPs/rr_data.csv")


#For a robustness check, we also run this for k=40 and 60 topics: 
rrmodelFit2 <- stm(documents = rr_stmdfm$documents, vocab = rr_stmdfm$vocab, K=40, max.em.its=500, seed=1234, data = rr_stmdfm$meta, prevalence = ~ speaker + s(date_month))

save(rrmodelFit2, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/rr_modelfit2")
load("/Users/samueljohnston/Documents/R Code/GL and RRPs/rr_modelfit2")

labelTopics(rrmodelFit2)
findThoughts(rrmodelFit2, texts = rr_stmdfm$meta$original_text, topics = 34, n = 20) #use this whenever we are unsure about a topic to examine 20 articles


rrmodelFit3 <- stm(documents = rr_stmdfm$documents, vocab = rr_stmdfm$vocab, K=60, max.em.its=500, seed=1234, data = rr_stmdfm$meta, prevalence = ~ speaker + s(date_month))

save(rrmodelFit3, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/rr_modelfit3")
load("/Users/samueljohnston/Documents/R Code/GL and RRPs/rr_modelfit3")

labelTopics(rrmodelFit3)
findThoughts(rrmodelFit3, texts = rr_stmdfm$meta$original_text, topics = 20, n = 20) #use this whenever we are unsure about a topic to examine 20 articles


#For k=40 we get three topics: topics 20 and 22 are on control, and topic 26 is on integration. 
rr_theta_topic2 <- rrmodelFit2$theta[,c(20, 22, 26)] #get the thetas for the topics
rr_theta2 <- aggregate(rr_theta_topic2, by = list(month = rr_stmdfm$meta$date_month),FUN = mean)
rr_theta2 <- rr_theta2[order(rr_theta2$month), ] #order the aggregated data by month, in ascending order (i.e., from 2002 to 2019)
rr_theta2$Control <- (rr_theta2$V1 + rr_theta2$V2)*100 
rr_theta2$Integration <- (rr_theta2$V3)*100
write.csv(rr_theta2, "/Users/samueljohnston/Documents/R Code/GL and RRPs/rr_data2.csv")

#For k=60 we get four topics: topics 1 and 20 are on control, and topics 4 and 13 are on integration. 
rr_theta_topic3 <- rrmodelFit3$theta[,c(1, 20, 4, 13)] #get the thetas for the topics
rr_theta3 <- aggregate(rr_theta_topic3, by = list(month = rr_stmdfm$meta$date_month),FUN = mean)
rr_theta3 <- rr_theta3[order(rr_theta3$month), ] #order the aggregated data by month, in ascending order (i.e., from 2002 to 2019)
rr_theta3$Control <- (rr_theta3$V1 + rr_theta3$V2)*100 
rr_theta3$Integration <- (rr_theta3$V3 + rr_theta3$V4)*100
write.csv(rr_theta3, "/Users/samueljohnston/Documents/R Code/GL and RRPs/rr_data3.csv")
```

```{r}
#3: Left STM. Firstly, in order to convert the DFM into STM (which helps with memory):
left_stmdfm <- convert(left_dfm, to = "stm")
left_stmdfm$meta$date_month <- floor_date(left_stmdfm$meta$ymd, "month")


#Secondly, in order to fit an STM model with k=50 topics, 500 iterations for convergence (the max.em.its argument), and seed set to enable replicability:
leftmodelFit <- stm(documents = left_stmdfm$documents, vocab = left_stmdfm$vocab, K=50, max.em.its=500, seed=1234, data = left_stmdfm$meta, prevalence = ~ speaker + s(date_month))

#Finally, to save the estimated model to disk and then to load it:
save(leftmodelFit, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/left_modelfit")
load("/Users/samueljohnston/Documents/R Code/GL and RRPs/left_modelfit")

#To determine what the topics are:
labelTopics(leftmodelFit)
findThoughts(leftmodelFit, texts = left_stmdfm$meta$original_text, topics = 46, n = 20) #use this whenever we are unsure about a topic to examine 20 articles


#For a robustness check, we also run this for k=40 and 60 topics: 
leftmodelFit2 <- stm(documents = left_stmdfm$documents, vocab = left_stmdfm$vocab, K=40, max.em.its=500, seed=1234, data = left_stmdfm$meta, prevalence = ~ speaker + s(date_month))

save(leftmodelFit2, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/left_modelfit2")
load("/Users/samueljohnston/Documents/R Code/GL and RRPs/left_modelfit2")

labelTopics(leftmodelFit2)
findThoughts(leftmodelFit2, texts = left_stmdfm$meta$original_text, topics = 26, n = 20) #use this whenever we are unsure about a topic to examine 20 articles

leftmodelFit3 <- stm(documents = left_stmdfm$documents, vocab = left_stmdfm$vocab, K=60, max.em.its=500, seed=1234, data = left_stmdfm$meta, prevalence = ~ speaker + s(date_month))

save(leftmodelFit3, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/left_modelfit3")
load("/Users/samueljohnston/Documents/R Code/GL and RRPs/left_modelfit3")

labelTopics(leftmodelFit3)
findThoughts(leftmodelFit3, texts = left_stmdfm$meta$original_text, topics = 46, n = 20) #use this whenever we are unsure about a topic to examine 20 articles

#Now we find the thetas for the following topics: 46 (integration) for k=50; 24 (immigration overall) for k=40; and topics 24 (control) and 46 (integration) for k=60. Topics are sorted by month. We then add this to the dataset: 
left_theta_topic <- leftmodelFit$theta[,c(46)] #get the thetas for the topics. K=50 first
left_theta <- aggregate(left_theta_topic, by = list(month = left_stmdfm$meta$date_month),FUN = mean)
left_theta <- left_theta[order(left_theta$month), ] 
left_theta$Integration <- left_theta$x*100 
write.csv(left_theta, "/Users/samueljohnston/Documents/R Code/GL and RRPs/left_data.csv")

left_theta_topic2 <- leftmodelFit2$theta[,c(24)] #K=40.
left_theta2 <- aggregate(left_theta_topic2, by = list(month = left_stmdfm$meta$date_month),FUN = mean)
left_theta2 <- left_theta2[order(left_theta2$month), ] 
left_theta2$Immigration <- left_theta2$x*100 
write.csv(left_theta2, "/Users/samueljohnston/Documents/R Code/GL and RRPs/left_data2.csv")

left_theta_topic3 <- leftmodelFit3$theta[,c(24, 46)] #K=60
left_theta3 <- aggregate(left_theta_topic3, by = list(month = left_stmdfm$meta$date_month),FUN = mean)
left_theta3 <- left_theta3[order(left_theta3$month), ] 
left_theta3$Control <- (left_theta3$V1)*100 
left_theta3$Integration <- (left_theta3$V2)*100
write.csv(left_theta3, "/Users/samueljohnston/Documents/R Code/GL and RRPs/left_data3.csv")
```

```{r}
#3: Right STM. Firstly, in order to convert the DFM into STM (which helps with memory):
right_stmdfm <- convert(right_dfm, to = "stm")
right_stmdfm$meta$date_month <- floor_date(right_stmdfm$meta$ymd, "month")


#Secondly, in order to fit an STM model with k=50 topics, 500 iterations for convergence (the max.em.its argument), and seed set to enable replicability:
rightmodelFit <- stm(documents = right_stmdfm$documents, vocab = right_stmdfm$vocab, K=50, max.em.its=500, seed=1234, data = right_stmdfm$meta, prevalence = ~ speaker + s(date_month))

#Finally, to save the estimated model to disk and then to load it:
save(rightmodelFit, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/right_modelfit")
load("/Users/samueljohnston/Documents/R Code/GL and RRPs/right_modelfit")

#To determine what the topics are:
labelTopics(rightmodelFit)
findThoughts(rightmodelFit, texts = right_stmdfm$meta$original_text, topics = 46, n = 20) #use this whenever we are unsure about a topic to examine 20 articles

#to export it as a csv so we can add it to the overall dataset:
write.csv(right_theta, "/Users/samueljohnston/Documents/R Code/GL and RRPs/right_data.csv")

#For a robustness check, we also run this for k=40 and 60 topics: 
rightmodelFit2 <- stm(documents = right_stmdfm$documents, vocab = right_stmdfm$vocab, K=40, max.em.its=500, seed=1234, data = right_stmdfm$meta, prevalence = ~ speaker + s(date_month))

save(rightmodelFit2, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/right_modelfit2")
load("/Users/samueljohnston/Documents/R Code/GL and RRPs/right_modelfit2")

labelTopics(rightmodelFit2)
findThoughts(rightmodelFit2, texts = right_stmdfm$meta$original_text, topics = 4, n = 20) #use this whenever we are unsure about a topic to examine 20 articles


rightmodelFit3 <- stm(documents = right_stmdfm$documents, vocab = right_stmdfm$vocab, K=60, max.em.its=500, seed=1234, data = right_stmdfm$meta, prevalence = ~ speaker + s(date_month))

save(rightmodelFit3, file = "/Users/samueljohnston/Documents/R Code/GL and RRPs/right_modelfit3")
load("/Users/samueljohnston/Documents/R Code/GL and RRPs/right_modelfit3")

labelTopics(rightmodelFit3)
findThoughts(rightmodelFit3, texts = right_stmdfm$meta$original_text, topics = 58, n = 20) #use this whenever we are unsure about a topic to examine 20 articles

#Now we find the thetas for the following topics: 4 (integration) and 46 (control) for k=50; 4 (integration) for k=40; and topics 46 (control) and 58 (integration) for k=60. Topics are sorted by month. We then add this to the dataset: 
right_theta_topic <- rightmodelFit$theta[,c(4, 46)] #get the thetas for the topics. K=50 first
right_theta <- aggregate(right_theta_topic, by = list(month = right_stmdfm$meta$date_month),FUN = mean)
right_theta <- right_theta[order(right_theta$month), ] 
right_theta$Integration <- right_theta$V1*100
right_theta$Control <- right_theta$V2*100 
write.csv(right_theta, "/Users/samueljohnston/Documents/R Code/GL and RRPs/right_data.csv")

right_theta_topic2 <- rightmodelFit2$theta[,c(4)] #K=40.
right_theta2 <- aggregate(right_theta_topic2, by = list(month = right_stmdfm$meta$date_month),FUN = mean)
right_theta2 <- right_theta2[order(right_theta2$month), ] 
right_theta2$Integration <- right_theta2$x*100
write.csv(right_theta2, "/Users/samueljohnston/Documents/R Code/GL and RRPs/right_data2.csv")

right_theta_topic3 <- rightmodelFit3$theta[,c(46, 58)] #K=60.
right_theta3 <- aggregate(right_theta_topic3, by = list(month = right_stmdfm$meta$date_month),FUN = mean)
right_theta3 <- right_theta3[order(right_theta3$month), ] 
right_theta3$Integration <- right_theta3$V2*100
right_theta3$Control <- right_theta3$V1*100 
write.csv(right_theta3, "/Users/samueljohnston/Documents/R Code/GL and RRPs/right_data3.csv")
```

```{r}
#This chunk is for validating the models (Please see the Supplementary Materials). We will assess topic quality (semantic coherence and exclusivity) using the topicQuality function to calculate both semantic coherence and exclusivity, as well as to plot these. We will start with GL:
png('/Users/samueljohnston/Documents/R Code/GL and RRPs/gltopicquality.png')
par(mfrow=c(3, 1))
topicQuality(model = modelFit1, documents = gl_stmdfm$documents, xlab = "Semantic Coherence", ylab = "Exclusivity",
             labels = 1:ncol(modelFit1$theta), M = 15) #K=40
topicQuality(model = modelFit3, documents = gl_stmdfm$documents, xlab = "Semantic Coherence", ylab = "Exclusivity",
             labels = 1:ncol(modelFit3$theta), M = 15) #K=50
topicQuality(model = modelFit2, documents = gl_stmdfm$documents, xlab = "Semantic Coherence", ylab = "Exclusivity",
             labels = 1:ncol(modelFit2$theta), M = 15) #K=60
dev.off()

#Now for RRPs.
png('/Users/samueljohnston/Documents/R Code/GL and RRPs/rrtopicquality.png')
par(mfrow=c(3, 1))
topicQuality(model = rrmodelFit2, documents = rr_stmdfm$documents, xlab = "Semantic Coherence", ylab = "Exclusivity",
             labels = 1:ncol(rrmodelFit2$theta), M = 15) #K=40
topicQuality(model = rrmodelFit, documents = rr_stmdfm$documents, xlab = "Semantic Coherence", ylab = "Exclusivity",
             labels = 1:ncol(rrmodelFit$theta), M = 15) #K=50
topicQuality(model = rrmodelFit3, documents = rr_stmdfm$documents, xlab = "Semantic Coherence", ylab = "Exclusivity",
             labels = 1:ncol(rrmodelFit3$theta), M = 15) #K=60
dev.off()

#Now for left parties.
png('/Users/samueljohnston/Documents/R Code/GL and RRPs/lefttopicquality.png')
par(mfrow=c(3, 1))
topicQuality(model = leftmodelFit2, documents = left_stmdfm$documents, xlab = "Semantic Coherence", ylab = "Exclusivity",
             labels = 1:ncol(leftmodelFit2$theta), M = 15) #K=40
topicQuality(model = leftmodelFit, documents = left_stmdfm$documents, xlab = "Semantic Coherence", ylab = "Exclusivity",
             labels = 1:ncol(leftmodelFit$theta), M = 15) #K=50
topicQuality(model = leftmodelFit3, documents = left_stmdfm$documents, xlab = "Semantic Coherence", ylab = "Exclusivity",
             labels = 1:ncol(leftmodelFit3$theta), M = 15) #K=60
dev.off()

#Now for right parties.
png('/Users/samueljohnston/Documents/R Code/GL and RRPs/righttopicquality.png')
par(mfrow=c(3, 1))
topicQuality(model = rightmodelFit2, documents = right_stmdfm$documents, xlab = "Semantic Coherence", ylab = "Exclusivity",
             labels = 1:ncol(rightmodelFit2$theta), M = 15) #K=40
topicQuality(model = rightmodelFit, documents = right_stmdfm$documents, xlab = "Semantic Coherence", ylab = "Exclusivity",
             labels = 1:ncol(rightmodelFit$theta), M = 15) #K=50
topicQuality(model = rightmodelFit3, documents = right_stmdfm$documents, xlab = "Semantic Coherence", ylab = "Exclusivity",
             labels = 1:ncol(rightmodelFit3$theta), M = 15) #K=60
dev.off()
```


```{r}
#This chunk is for the various different models we ran to test this, which results in Table 2. First, let's open the data:
library(foreign)
final_data <- read.csv("/Users/samueljohnston/Documents/R Code/GL and RRPs/Data.csv")


#The first model examines overall immigration saliency, the second integration saliency, and the third control saliency: 
Model1 <- plm(GL.Forty.Immigration ~ RRP.Vote.Share2 + RRP.Sixty.Immigration + Left.Sixty.Immigration + Right.Sixty.Immigration + Lagged.Forty.Immigration + Cabinet + Effective.Number.of.Parties + Unemployment + Immigration + Economic.Growth, data = final_data, model = "within", index = "Year")

pdwtest(Model1) #This runs a Durbin-Watson test for autocorrelation, and it shows that this is not an issue as we fail to reject the null hypothesis. 

#Now to test for autocorrelation in a model that does not include the lagged DV:
Model1B <- plm(GL.Forty.Immigration ~ RRP.Vote.Share2 + RRP.Sixty.Immigration + Left.Sixty.Immigration + Right.Sixty.Immigration + Cabinet + Effective.Number.of.Parties + Unemployment + Immigration + Economic.Growth, data = final_data, model = "within", index = "Year")

pdwtest(Model1B)

bptest(GL.Forty.Immigration ~ RRP.Vote.Share2 + RRP.Sixty.Immigration + Left.Sixty.Immigration + Lagged.Forty.Immigration + Cabinet + Effective.Number.of.Parties + Unemployment + Immigration + Economic.Growth + factor(Year), data = final_data, studentize = F) #This runs a Breusch-Pagan test, which shows that we do need to use robust standard errors for heteroskedasticity.


#The multilevel models, where level 1 is speaker, and level 2 is year: 
final_data$Immigration <- scale(final_data$Immigration, scale = TRUE) 

modelMLM1 <- lmer(GL.Forty.Immigration ~ RRP.Vote.Share2 + RRP.Sixty.Immigration + Left.Sixty.Immigration + Right.Sixty.Immigration + Lagged.Forty.Immigration + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + (1 | Speaker/Year), data = final_data)


modelMLM2 <- lmer(GL.Forty.Integration ~ RRP.Vote.Share2 + RRP.Sixty.Integration + Left.Sixty.Integration + Right.Sixty.Integration + Lagged.Forty.Integration + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + + (1 | Speaker/Year), data = final_data)

modelMLM3 <- lmer(GL.Forty.Control ~ RRP.Vote.Share2 + RRP.Sixty.Control + Left.Sixty.Control + Right.Sixty.Control + Lagged.Forty.Control + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + (1 | Speaker/Year), data = final_data)

stargazer(modelMLM1, modelMLM2, modelMLM3, title = "The Effect of RRPs and Left-Wing Parties on Immigration Saliency in GL Speeches", dep.var.labels = c("GL Immigration Saliency", "GL Integration Saliency", "GL Control Saliency"), no.space = TRUE, style = "apsr", star.char = c("*", "**"), star.cutoffs = c(.05, .01), notes = c("*p < .05; **p < .01"), notes.append = FALSE, type = "text", out = "modelsMLM.txt")

library(car)
vif(modelMLM1)
vif(modelMLM2)
vif(modelMLM3)
```


```{r}
#Descriptive statistics (please see the Supplementary Materials)
#Descriptive statistics for all variables
summary(final_data$GL.Forty.Immigration)
sd(final_data$GL.Forty.Immigration)
summary(final_data$GL.Forty.Integration)
sd(final_data$GL.Forty.Integration)
summary(final_data$GL.Forty.Control)
sd(final_data$GL.Forty.Control)
summary(final_data$RRP.Vote.Share2)
sd(final_data$RRP.Vote.Share2)
summary(final_data$RRP.Sixty.Immigration)
sd(final_data$RRP.Sixty.Immigration)
summary(final_data$RRP.Sixty.Integration)
sd(final_data$RRP.Sixty.Integration)
summary(final_data$RRP.Sixty.Control)
sd(final_data$RRP.Sixty.Control)
summary(final_data$Left.Sixty.Immigration)
sd(final_data$Left.Sixty.Immigration)
summary(final_data$Left.Sixty.Integration)
sd(final_data$Left.Sixty.Integration)
summary(final_data$Left.Sixty.Control)
sd(final_data$Left.Sixty.Control)
summary(final_data$Right.Sixty.Immigration)
sd(final_data$Right.Sixty.Immigration)
summary(final_data$Right.Sixty.Integration)
sd(final_data$Right.Sixty.Integration)
summary(final_data$Right.Sixty.Control)
sd(final_data$Right.Sixty.Control)
summary(final_data$Lagged.Forty.Immigration)
sd(final_data$Lagged.Forty.Immigration)
summary(final_data$Lagged.Forty.Integration)
sd(final_data$Lagged.Forty.Integration)
summary(final_data$Lagged.Forty.Control)
sd(final_data$Lagged.Forty.Control)
summary(final_data$Cabinet)
sd(final_data$Cabinet)
summary(final_data$Effective.Number.of.Parties)
sd(final_data$Effective.Number.of.Parties)
summary(final_data$Immigration)
sd(final_data$Immigration)
summary(final_data$Economic.Growth)
sd(final_data$Economic.Growth)
summary(final_data$Unemployment, na.rm = TRUE)
sd(final_data$Unemployment, na.rm = TRUE)


#In order to plot the GL descriptives, we first identify the top 10 speakers in the dataset, and subset the data in order just to examine them.
table(final_data$Speaker) #To find the top 10 speakers
data <- final_data %>% filter(final_data$Speaker %in% c("Van Gent", "Vendrik", "Halsema", "Duyvendak", "Azough", "Dibi", "Klaver", "Van Tongeren", "Voortman", "Van Ojik"))

#Now for the plots:
plot1 <- ggplot(data, aes(x = Year, y = GL.Forty.Immigration, colour = Speaker)) +
  labs(x = "Year", y = "GL Immigration Saliency", title = "Figure 5: Trends in GL Immigration Saliency Over Time, by Speaker") +
  geom_point()

pdf('/Users/samueljohnston/Documents/R Code/GL and RRPs/gldescriptives.pdf') #These three lines are for exporting the plots. 
plot1
dev.off()

plot2 <- ggplot(data, aes(x = Year, y = GL.Forty.Integration, colour = Speaker)) +
  labs(x = "Year", y = "GL Integration Saliency") +
  geom_point()   

pdf('/Users/samueljohnston/Documents/R Code/GL and RRPs/gldescriptives2.pdf') 
plot2
dev.off()

plot3 <- ggplot(data, aes(x = Year, y = GL.Forty.Control, colour = Speaker)) +
  labs(x = "Year", y = "GL Control Saliency") +
  geom_point()

pdf('/Users/samueljohnston/Documents/R Code/GL and RRPs/gldescriptives3.pdf') #These three lines are for exporting the plots. 
plot3
dev.off()


#Now to create a descriptive plot for the main dependent and explanatory variables (immigration saliency for GL and RRPs, and RRP vote share)
descriptives <- descriptives <- read.csv("/Users/samueljohnston/Documents/R Code/GL and RRPs/Descriptives.csv")

descriptives_long <- melt(descriptives, id = "Month")  # convert to long format

plotA <- ggplot(descriptives_long, aes(x = Month, y = value, group = variable)) +
       geom_line(aes(colour = variable)) +
       scale_x_discrete(name = "Month", breaks = c("2002_5_01", "2003_01_01", "2004_01_01", "2005_01_01", "2006_01_01", "2007_01_01", "2008_01_01", "2009_01_01", "2010_01_01", "2011_01_01", "2012_01_01", "2013_01_01", "2014_01_01", "2015_01_01", "2016_01_01", "2017_01_01", "2018_01_01", "2019_07_01")) +
       scale_y_discrete(name = "Value (%)") +
       labs (title = "Figure 1: Trends in Immigration Saliency and RRP Vote Share, 2002-19", colour = "Variable") +
       scale_color_hue(labels = c("GL Average Immigration Saliency", "RRP Vote Share", "RRP Immigration Saliency"))

pdf('/Users/samueljohnston/Documents/R Code/GL and RRPs/descriptives.pdf') #These three lines are for exporting the plots. 
plotA
dev.off()
```


```{r}
#ROBUSTNESS CHECKS (please see the Supplementary Materials)
#Firstly, we will run a model when k=50 for all 4 corpora. Since we only found an integration topic in the left corpus at k=50, we will only run an integration model. 
Model4 <- lmer(Integration ~ RRP.Vote.Share2 + RR.Integration + Left.Integration + Right.Integration + Lagged.Integration + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + (1 | Speaker/Year), data = final_data)

stargazer(Model4, title = "The Effect of RRPs and Left-Wing Parties on Immigration Saliency in GL Speeches", dep.var.labels = c("GL Integration Saliency"),   no.space = TRUE, style = "apsr", star.char = c("*", "**"), star.cutoffs = c(.05, .01), notes = c("*p < .05; **p < .01"), notes.append = FALSE, type = "text", out = "models2.txt")


#Secondly, we will run models where k=60 for all 4 corpora.
Model5 <- lmer(GL.Sixty.Immigration ~ RRP.Vote.Share2 + RRP.Sixty.Immigration + Left.Sixty.Immigration + Right.Sixty.Immigration + Lagged.Sixty.Immigration + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + (1 | Speaker/Year), data = final_data)

Model6 <- lmer(GL.Sixty.Integration ~ RRP.Vote.Share2 + RRP.Sixty.Integration + Left.Sixty.Integration + Right.Sixty.Integration + Lagged.Sixty.Integration + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + (1 | Speaker/Year), data = final_data)

Model7 <- lmer(GL.Sixty.Control ~ RRP.Vote.Share2 + RRP.Sixty.Control + Left.Sixty.Control + Right.Sixty.Control + Lagged.Sixty.Control + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + (1 | Speaker/Year), data = final_data)

stargazer(Model5, Model6, Model7, title = "The Effect of RRPs and Left-Wing Parties on Immigration Saliency in GL Speeches", dep.var.labels = c("GL Immigration Saliency", "GL Integration Saliency", "GL Control Saliency"),   no.space = TRUE, style = "apsr", star.char = c("*", "**"), star.cutoffs = c(.05, .01), notes = c("*p < .05; **p < .01"), notes.append = FALSE, type = "text", out = "models3.txt")


#Thirdly, we replace RRP vote share with a variable that only includes Tweede Kamer election results: 
Model8 <- lmer(GL.Forty.Immigration ~ RR.Vote.Share + RRP.Sixty.Immigration + Left.Sixty.Immigration + Right.Sixty.Immigration + Lagged.Forty.Immigration + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + (1 | Speaker/Year), data = final_data)

Model9 <- lmer(GL.Forty.Integration ~ RR.Vote.Share + RRP.Sixty.Integration + Left.Sixty.Integration + Right.Sixty.Integration + Lagged.Forty.Integration + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + (1 | Speaker/Year), data = final_data)

Model10 <- lmer(GL.Forty.Control ~ RR.Vote.Share + RRP.Sixty.Control + Left.Sixty.Control + Right.Sixty.Control + Lagged.Forty.Control + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + (1 | Speaker/Year), data = final_data)

stargazer(Model8, Model9, Model10, title = "The Effect of RRPs and Left-Wing Parties on Immigration Saliency in GL Speeches", dep.var.labels = c("GL Immigration Saliency", "GL Integration Saliency", "GL Control Saliency"),   no.space = TRUE, style = "apsr", star.char = c("*", "**"), star.cutoffs = c(.05, .01), notes = c("*p < .05; **p < .01"), notes.append = FALSE, type = "text", out = "models4.txt")


#Fourthly, we add controls for RRP government involvement and disproportionality:
Model11 <- lmer(GL.Forty.Immigration ~ RRP.Vote.Share2 + RRP.Sixty.Immigration + Left.Sixty.Immigration + Right.Sixty.Immigration + Lagged.Forty.Immigration + Cabinet + RR.Government.Involvement + Disproportionality + Effective.Number.of.Parties + Unemployment + Immigration + Economic.Growth + (1 | Speaker/Year), data = final_data)

Model12 <- lmer(GL.Forty.Integration ~ RRP.Vote.Share2 + RRP.Sixty.Integration + Left.Sixty.Integration + Right.Sixty.Integration + Lagged.Forty.Integration + Cabinet + RR.Government.Involvement + Disproportionality + Effective.Number.of.Parties + Unemployment + Immigration + Economic.Growth + (1 | Speaker/Year), data = final_data)

Model13 <- lmer(GL.Forty.Control ~ RRP.Vote.Share2 + RRP.Sixty.Control + Left.Sixty.Control + Right.Sixty.Control + Lagged.Forty.Control + Cabinet + RR.Government.Involvement + Disproportionality + Effective.Number.of.Parties + Unemployment + Immigration + Economic.Growth + (1 | Speaker/Year), data = final_data)

stargazer(Model11, Model12, Model13, title = "The Effect of RRPs and Left-Wing Parties on Immigration Saliency in GL Speeches", dep.var.labels = c("GL Immigration Saliency", "GL Integration Saliency", "GL Control Saliency"),   no.space = TRUE, style = "apsr", star.char = c("*", "**"), star.cutoffs = c(.05, .01), notes = c("*p < .05; **p < .01"), notes.append = FALSE, type = "text", out = "models5.txt")


#Fifthly, we will subset the data to only include those months for which we have aggregated polling data on RRP support, and use this as a measure of RRP strength instead of election results: 
final_data2 <- final_data %>% filter(final_data$Month >= "2012-10-01")

final_data2$Immigration <- scale(final_data2$Immigration, scale = TRUE) 

modelMLM14 <- lmer(GL.Forty.Immigration ~ RRP.Polling + RRP.Sixty.Immigration + Left.Sixty.Immigration + Right.Sixty.Immigration + Lagged.Forty.Immigration + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + (1 | Speaker/Year), data = final_data2)

modelMLM15 <- lmer(GL.Forty.Integration ~ RRP.Polling + RRP.Sixty.Integration + Left.Sixty.Integration + Right.Sixty.Integration + Lagged.Forty.Integration + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + (1 | Speaker/Year), data = final_data2)

modelMLM16 <- lmer(GL.Forty.Control ~ RRP.Polling + RRP.Sixty.Control + Left.Sixty.Control + Right.Sixty.Control + Lagged.Forty.Control + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + (1 | Speaker/Year), data = final_data2)

stargazer(modelMLM14, modelMLM15, modelMLM16, title = "The Effect of RRPs and Left-Wing Parties on Immigration Saliency in GL Speeches", dep.var.labels = c("GL Immigration Saliency", "GL Integration Saliency", "GL Control Saliency"), no.space = TRUE, style = "apsr", star.char = c("*", "**"), star.cutoffs = c(.05, .01), notes = c("*p < .05; **p < .01"), notes.append = FALSE, type = "text", out = "models6.txt")


#Finally, we add a measure of public opinion on immigration, which is proxied for by the number of major protests concerning immigration in a given year.
modelMLM17 <- lmer(GL.Forty.Immigration ~ RRP.Vote.Share2 + RRP.Sixty.Immigration + Left.Sixty.Immigration + Right.Sixty.Immigration + Lagged.Forty.Immigration + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + Immigration.Protest.Incidence + (1 | Speaker/Year), data = final_data)


modelMLM18 <- lmer(GL.Forty.Integration ~ RRP.Vote.Share2 + RRP.Sixty.Integration + Left.Sixty.Integration + Right.Sixty.Integration + Lagged.Forty.Integration + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + Immigration.Protest.Incidence + (1 | Speaker/Year), data = final_data)

modelMLM19 <- lmer(GL.Forty.Control ~ RRP.Vote.Share2 + RRP.Sixty.Control + Left.Sixty.Control + Right.Sixty.Control + Lagged.Forty.Control + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + Immigration.Protest.Incidence + (1 | Speaker/Year), data = final_data)


modelMLM20 <- lmer(GL.Forty.Immigration ~ RRP.Vote.Share2 + RRP.Sixty.Immigration + Left.Sixty.Immigration + Right.Sixty.Immigration + Lagged.Forty.Immigration + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + Combined.Protest.Incidence + (1 | Speaker/Year), data = final_data)


modelMLM21 <- lmer(GL.Forty.Integration ~ RRP.Vote.Share2 + RRP.Sixty.Integration + Left.Sixty.Integration + Right.Sixty.Integration + Lagged.Forty.Integration + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + Combined.Protest.Incidence + (1 | Speaker/Year), data = final_data)

modelMLM22 <- lmer(GL.Forty.Control ~ RRP.Vote.Share2 + RRP.Sixty.Control + Left.Sixty.Control + Right.Sixty.Control + Lagged.Forty.Control + Cabinet + Effective.Number.of.Parties + Immigration + Economic.Growth + Unemployment + Combined.Protest.Incidence + (1 | Speaker/Year), data = final_data)

stargazer(modelMLM17, modelMLM18, modelMLM19, modelMLM20, modelMLM21, modelMLM22, title = "The Effect of RRPs and Left-Wing Parties on Immigration Saliency in GL Speeches", dep.var.labels = c("GL Immigration Saliency", "GL Integration Saliency", "GL Control Saliency"), no.space = TRUE, style = "apsr", star.char = c("*", "**"), star.cutoffs = c(.05, .01), notes = c("*p < .05; **p < .01"), notes.append = FALSE, type = "text", out = "models7.txt")
```

